-
debuggerd 简介
Android系统自带一个实用的程序异常退出的诊断daemon debuggerd。此进程可以侦测到程序崩溃,并将崩溃时的进程状态信息输出到文件和串口中,以供开发人员分析调试使用。Debuggerd的数据被保存在/data/tombstone/目录下,共可保存10个文件,当超过10个时,会覆盖重写最早生产的文件。串口中,则直接用DEBUG的tag,输出logcat信息。 Linux kernel有自己的一套signal机制,在应用程序崩溃时,通常系统内核都会发送signal到出问题的进程,以通知进程出现什么异常,这些进程可以捕获这些signal并对其做相应的处理。通常对于程序异常信号的处理,就是退出。Android在此机制上实现了一个更实用的功能:拦截这些信号,dump进程信息以供调试。
debuggerd的运行原理
debuggerd创建一个名为 “Android:debuggerd”的socket,作为server端等待其他client端进程的连接,接收client端进程发送来的tid和action信息将由tid指定的那个进程的运行信息,按照由action指定的动作dump到文件或者控制台中可以作为debuggerd的client端的进程主要有几种:
1. 异常的C/C++程序
这种程序由bionic的linker安装异常信号的处理函数,当程序产生异常信号时,进入信号处理函数,与debuggerd建立。
2. debuggerd程序
debuggerd可以在控制台中以命令debuggerd -b []启动 ,然后与debuggerd daemon建立连接。这样debuggerd可以在不中断进程执行的情况下dump由tid指定的进程的信息。
3. dumpstate
控制台中运行命令dumpstate,并指定必要的参数,命令中会调用dump_backtrace_to_file与debuggerd交互。
debuggerd的使用方法
产生异常信号的C/C++程序与debuggerd建立连接后,debuggerd将进程信息dump到tombstone_XX文件中保存到/data/tombstone/文件夹下。可通过查看tombstone_XX分析异常进程的堆栈信息。
在控制台中以命令debuggerd -b []启动。如果加上-b参数,则由tid指定的进程的信息将dump到控制台上,否则dump到tombstone文件中。控制台中运行命令callstack/dumpstate,进程信息会写入这两个命令指定的文件中。
应用程序异常处理过程
应用程序入口属于bionic实现的一部分,则对所有android的程序有效。在应用程序入口地址__start后,__linker_init中调用debugger_init()函数来注册异常信号处理handler,以实现拦截系统异常的几个singal:SIGILL,SIGABRT, SIGBUS, SIGFPE,SIGSEGV和SIGPIPE:
linker/arch/arm/begin.S
1234start:
mov r0, sp
mov r1, #
0
bl __linker_init
bionic\linker\ Linker.cpp
1234567extern
"C"
Elf32_Addr __linker_init(
void
* raw_args) {
…
Elf32_Addr start_address = __linker_init_post_relocation(args, linker_addr);
set_soinfo_pool_protection(PROT_READ);
// Return the address that the calling assembly stub should jump to.
return
start_address;
}
12345static
Elf32_Addr __linker_init_post_relocation(KernelArgumentBlock& args, Elf32_Addr linker_base) {
...
debuggerd_init();
...
}
bionic\linker\Debugger.c
123456789101112131415void
debugger_init()
{
struct sigaction act;
memset(&act,
0
, sizeof(act));
act.sa_sigaction = debugger_signal_handler;
act.sa_flags = SA_RESTART | SA_SIGINFO;
sigemptyset(&act.sa_mask);
sigaction(SIGILL, &act, NULL);
sigaction(SIGABRT, &act, NULL);
sigaction(SIGBUS, &act, NULL);
sigaction(SIGFPE, &act, NULL);
sigaction(SIGSEGV, &act, NULL);
sigaction(SIGSTKFLT, &act, NULL);
sigaction(SIGPIPE, &act, NULL);
}
bionic库中的链接器会对以下七种信号设置Handler(debugger_signal_handler):
-
SIGILL(非法指令异常) SIGABRT(abort退出异常) SIGBUS(硬件访问异常) SIGFPE(浮点运算异常) SIGSEGV(内存访问异常) SIGSTKFLT(协处理器栈异常) SIGPIPE(管道异常)
debugger_init中act.sa_flags = SA_RESTART | SA_SIGINFO的涵义:
1) SA_RESTART
如果指定该参数,表示若信号中断了进程的某个系统调用,则系统自动启动该系统调用。如果不指定该参数,则被中断的系统调用返回失败,错误码为EINTR。这个标志位只要用于处理慢系统调用(可能会被阻塞的系统调用)。比如调用write系统调用写某个设备被阻塞,这时进程捕获某个信号且进入相应信号处理函数返回时,该系统调用可能要返回ENINTR错误。指定这个参数后,系统调用会重启,与RETRY_ON_EINTR宏配合使用则可以保证写操作的完成
2) SA_SIGINFO
如果指定该参数,表示信号附带的参数(siginfo_t结构体)可以被传递到信号处理函数中。
链接到bionic库上的C/C++程序崩溃时,内核会发送相应的signal,进程收到异常信号后,会转入debugger_signal_handler函数中进行处理。
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768void
debugger_signal_handler(
int
n, siginfo_t* info,
void
* unused)
{
char
msgbuf[
128
];
unsigned tid;
int
s;
logSignalSummary(n, info);
tid = gettid();
//"android:debuggerd"
s = socket_abstract_client(DEBUGGER_SOCKET_NAME, SOCK_STREAM);
if
(s >=
0
) {
/* debugger knows our pid from the credentials on the
* local socket but we need to tell it our tid. It
* is paranoid and will verify that we are giving a tid
* that's actually in our process
*/
int
ret;
debugger_msg_t msg;
msg.action = DEBUGGER_ACTION_CRASH;
msg.tid = tid;
RETRY_ON_EINTR(ret, write(s, &msg, sizeof(msg)));
if
(ret == sizeof(msg)) {
/* if the write failed, there is no point to read on
* the file descriptor. */
RETRY_ON_EINTR(ret, read(s, &tid,
1
));
int
savedErrno = errno;
notify_gdb_of_libraries();
errno = savedErrno;
}
if
(ret <
0
) {
/* read or write failed -- broken connection? */
format_buffer(msgbuf, sizeof(msgbuf),
"Failed while talking to debuggerd: %s"
, strerror(errno));
__libc_android_log_write(ANDROID_LOG_FATAL,
"libc"
, msgbuf);
}
close(s);
}
else
{
/* socket failed; maybe process ran out of fds */
format_buffer(msgbuf, sizeof(msgbuf),
"Unable to open connection to debuggerd: %s"
, strerror(errno));
__libc_android_log_write(ANDROID_LOG_FATAL,
"libc"
, msgbuf);
}
/* remove our net so we fault for real when we return */
signal(n, SIG_DFL);
/*
* These signals are not re-thrown when we resume. This means that
* crashing due to (say) SIGPIPE doesn't work the way you'd expect it
* to. We work around this by throwing them manually. We don't want
* to do this for *all* signals because it'll screw up the address for
* faults like SIGSEGV.
*/
switch
(n) {
case
SIGABRT:
case
SIGFPE:
case
SIGPIPE:
case
SIGSTKFLT:
(
void
) tgkill(getpid(), gettid(), n);
break
;
default
:
// SIGILL, SIGBUS, SIGSEGV
break
;
}
}
debugger_signal_handler函数处理流程:
1) 调用logSignalSummary将signal信息写入文件;
123456789101112131415161718192021222324252627282930static
void
logSignalSummary(
int
signum,
const
siginfo_t* info)
{
char
buffer[
128
];
char
threadname[MAX_TASK_NAME_LEN +
1
];
// one more for termination
char
* signame;
switch
(signum) {
case
SIGILL: signame =
"SIGILL"
;
break
;
case
SIGABRT: signame =
"SIGABRT"
;
break
;
case
SIGBUS: signame =
"SIGBUS"
;
break
;
case
SIGFPE: signame =
"SIGFPE"
;
break
;
case
SIGSEGV: signame =
"SIGSEGV"
;
break
;
case
SIGSTKFLT: signame =
"SIGSTKFLT"
;
break
;
case
SIGPIPE: signame =
"SIGPIPE"
;
break
;
default
: signame =
"???"
;
break
;
}
if
(prctl(PR_GET_NAME, (unsigned
long
)threadname,
0
,
0
,
0
) !=
0
) {
strcpy(threadname,
"<name unknown="
">"
);
}
else
{
// short names are null terminated by prctl, but the manpage
// implies that 16 byte names are not.
threadname[MAX_TASK_NAME_LEN] =
0
;
}
format_buffer(buffer, sizeof(buffer),
"Fatal signal %d (%s) at 0x%08x (code=%d), thread %d (%s)"
,
signum, signame, info->si_addr, info->si_code, gettid(), threadname);
__libc_android_log_write(ANDROID_LOG_FATAL,
"libc"
, buffer);
}
</name>
获取异常信号的名字和thread名字,并格式化字符串,调用函数__libc_android_log_write函数写入”/dev/log/main”中。
2) 调用socket_abstract_client函数与debuggerd建立socket连接;
1s = socket_abstract_client(DEBUGGER_SOCKET_NAME, SOCK_STREAM);
3) 如果连接建立成功,则设置结构体debugger_msg_t,并发送给debuggerd;
123msg.action = DEBUGGER_ACTION_CRASH;
//告诉debuggerd采取何种行
msg.tid = tid;
//线程号
RETRY_ON_EINTR(ret, write(s, &msg, sizeof(msg)));
4) 等待debuggerd的回复,阻塞在下面的调用中,收到回复后接着执行下面的流程;
1RETRY_ON_EINTR(ret, read(s, &tid,
1
));
5) 重新设置信号处理函数为SIG_DFL,即采取默认的动作;
1signal(n, SIG_DFL);
6) 重新发送信号,进程从当前信号处理函数返回后,会处理这个信号,进行默认的信号处理动作,即中断进程。
debuggerd的源码分析
1. 在init进程中以deamon的方式启动,在init.rc中
12service debuggerd /system/bin/debuggerd
class
main
以这种方式启动的话,进入main函数后,将调用do_server函数,作为server端为其他进程提供dump进程信息的服务。
2. 直接运行system/bin/debuggerd可执行文件,需要指定参数,用法为:
1debuggerd -b [<tid>]
//参数-b表示在控制台中输出backtrace</tid>
以这种方式启动的话,进入main函数后,将调用do_explicit_dump函数与debuggerd daemon通信,将指定进程的信息dump到文件或控制台。
<img src="http://www.2cto.com/uploadfile/Collfiles/20140807/20140807091336144.png" alt="" kf="" ware="" vc="" "="" target="_blank" class="keylink" style="border-width: 0px; padding: 0px; margin: 0px auto; list-style: none; display: block; width: 630px; height: 500.236px;">vcq9PC9oND4KPHByZSBjbGFzcz0="brush:java;">int main(int argc, char** argv) { if (argc == 1) { return do_server(); } }
当启动debuggerd进程传递的参数个数为1时,此时启动的debuggerd将作为一个后台服务进程,专门接收应用程序异常退出消息而产生tombstone。
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859static
int
do_server() {
int
s;
struct sigaction act;
int
logsocket = -
1
;
/*
* debuggerd crashes can't be reported to debuggerd. Reset all of the
* crash handlers.
*/
signal(SIGILL, SIG_DFL);
signal(SIGABRT, SIG_DFL);
signal(SIGBUS, SIG_DFL);
signal(SIGFPE, SIG_DFL);
signal(SIGSEGV, SIG_DFL);
signal(SIGPIPE, SIG_IGN);
signal(SIGSTKFLT, SIG_DFL);
logsocket = socket_local_client(
"logd"
,
ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_DGRAM);
if
(logsocket <
0
) {
logsocket = -
1
;
}
else
{
fcntl(logsocket, F_SETFD, FD_CLOEXEC);
}
act.sa_handler = SIG_DFL;
sigemptyset(&act.sa_mask);
sigaddset(&act.sa_mask,SIGCHLD);
act.sa_flags = SA_NOCLDWAIT;
sigaction(SIGCHLD, &act,
0
);
s = socket_local_server(DEBUGGER_SOCKET_NAME,
ANDROID_SOCKET_NAMESPACE_ABSTRACT, SOCK_STREAM);
if
(s <
0
)
return
1
;
fcntl(s, F_SETFD, FD_CLOEXEC);
LOG(
"debuggerd: "
__DATE__
" "
__TIME__
"\n"
);
//check corefile limit.
(
void
)check_corefile_limit();
for
(;;) {
struct sockaddr addr;
socklen_t alen;
int
fd;
alen = sizeof(addr);
XLOG(
"waiting for connection\n"
);
fd = accept(s, &addr, &alen);
if
(fd <
0
) {
XLOG(
"accept failed: %s\n"
, strerror(errno));
continue
;
}
fcntl(fd, F_SETFD, FD_CLOEXEC);
handle_request(fd);
}
return
0
;
}
1. 忽略debuggerd自身crash的处理;
2. 建立socket通信的server端;
3. 进入无限循环中,等待并接收客户端进程连接请求,并通过handle_request()函数处理请求;
handle_request
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150static
void
handle_request(
int
fd) {
XLOG(
"handle_request(%d)\n"
, fd);
debugger_request_t request;
int
status = read_request(fd, &request);
if
(!status) {
XLOG(
"BOOM: pid=%d uid=%d gid=%d tid=%d\n"
,
request.pid, request.uid, request.gid, request.tid);
/* At this point, the thread that made the request is blocked in
* a read() call. If the thread has crashed, then this gives us
* time to PTRACE_ATTACH to it before it has a chance to really fault.
*
* The PTRACE_ATTACH sends a SIGSTOP to the target process, but it
* won't necessarily have stopped by the time ptrace() returns. (We
* currently assume it does.) We write to the file descriptor to
* ensure that it can run as soon as we call PTRACE_CONT below.
* See details in bionic/libc/linker/debugger.c, in function
* debugger_signal_handler().
*/
if
(ptrace(PTRACE_ATTACH, request.tid,
0
,
0
)) {
LOG(
"ptrace attach failed: %s\n"
, strerror(errno));
}
else
{
bool detach_failed =
false
;
bool attach_gdb = should_attach_gdb(&request);
if
(TEMP_FAILURE_RETRY(write(fd,
"\0"
,
1
)) !=
1
) {
LOG(
"failed responding to client: %s\n"
, strerror(errno));
}
else
{
char
* tombstone_path = NULL;
if
(request.action == DEBUGGER_ACTION_CRASH) {
close(fd);
fd = -
1
;
}
int
total_sleep_time_usec =
0
;
for
(;;) {
int
signal = wait_for_signal(request.tid, &total_sleep_time_usec);
if
(signal <
0
) {
break
;
}
switch
(signal) {
case
SIGSTOP:
if
(request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
XLOG(
"stopped -- dumping to tombstone\n"
);
tombstone_path = engrave_tombstone(request.pid, request.tid,
signal,
true
,
true
, &detach_failed,
&total_sleep_time_usec);
}
else
if
(request.action == DEBUGGER_ACTION_DUMP_BACKTRACE) {
XLOG(
"stopped -- dumping to fd\n"
);
dump_backtrace(fd, request.pid, request.tid, &detach_failed,
&total_sleep_time_usec);
}
else
{
XLOG(
"stopped -- continuing\n"
);
status = ptrace(PTRACE_CONT, request.tid,
0
,
0
);
if
(status) {
LOG(
"ptrace continue failed: %s\n"
, strerror(errno));
}
continue
;
/* loop again */
}
break
;
case
SIGILL:
case
SIGABRT:
case
SIGBUS:
case
SIGFPE:
case
SIGSEGV:
case
SIGSTKFLT: {
XLOG(
"stopped -- fatal signal\n"
);
/*
* Send a SIGSTOP to the process to make all of
* the non-signaled threads stop moving. Without
* this we get a lot of "ptrace detach failed:
* No such process".
*/
kill(request.pid, SIGSTOP);
/* don't dump sibling threads when attaching to GDB because it
* makes the process less reliable, apparently... */
tombstone_path = engrave_tombstone(request.pid, request.tid,
signal, !attach_gdb,
false
, &detach_failed,
&total_sleep_time_usec);
break
;
}
case
SIGPIPE:
LOG(
"socket-client process stopped due to SIGPIPE! \n"
);
break
;
default
:
XLOG(
"stopped -- unexpected signal\n"
);
LOG(
"process stopped due to unexpected signal %d\n"
, signal);
break
;
}
break
;
}
if
(request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
if
(tombstone_path) {
write(fd, tombstone_path, strlen(tombstone_path));
}
close(fd);
fd = -
1
;
}
free(tombstone_path);
}
XLOG(
"detaching\n"
);
if
(attach_gdb) {
/* stop the process so we can debug */
kill(request.pid, SIGSTOP);
/* detach so we can attach gdbserver */
if
(ptrace(PTRACE_DETACH, request.tid,
0
,
0
)) {
LOG(
"ptrace detach from %d failed: %s\n"
, request.tid, strerror(errno));
detach_failed =
true
;
}
/*
* if debug.db.uid is set, its value indicates if we should wait
* for user action for the crashing process.
* in this case, we log a message and turn the debug LED on
* waiting for a gdb connection (for instance)
*/
wait_for_user_action(request.pid);
}
else
{
/* just detach */
if
(ptrace(PTRACE_DETACH, request.tid,
0
,
0
)) {
LOG(
"ptrace detach from %d failed: %s\n"
, request.tid, strerror(errno));
detach_failed =
true
;
}
}
/* resume stopped process (so it can crash in peace). */
kill(request.pid, SIGCONT);
/* If we didn't successfully detach, we're still the parent, and the
* actual parent won't receive a death notification via wait(2). At this point
* there's not much we can do about that. */
if
(detach_failed) {
LOG(
"debuggerd committing suicide to free the zombie!\n"
);
kill(getpid(), SIGKILL);
}
}
}
if
(fd >=
0
) {
close(fd);
}
}
1) 调用read_request函数读取client端进程发送来的数据:
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566static
int
read_request(
int
fd, debugger_request_t* out_request) {
struct ucred cr;
int
len = sizeof(cr);
int
status = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
if
(status !=
0
) {
LOG(
"cannot get credentials\n"
);
return
-
1
;
}
XLOG(
"reading tid\n"
);
fcntl(fd, F_SETFL, O_NONBLOCK);
struct pollfd pollfds[
1
];
pollfds[
0
].fd = fd;
pollfds[
0
].events = POLLIN;
pollfds[
0
].revents =
0
;
status = TEMP_FAILURE_RETRY(poll(pollfds,
1
,
3000
));
if
(status !=
1
) {
LOG(
"timed out reading tid\n"
);
return
-
1
;
}
debugger_msg_t msg;
status = TEMP_FAILURE_RETRY(read(fd, &msg, sizeof(msg)));
if
(status <
0
) {
LOG(
"read failure? %s\n"
, strerror(errno));
return
-
1
;
}
if
(status != sizeof(msg)) {
LOG(
"invalid crash request of size %d\n"
, status);
return
-
1
;
}
out_request->action = msg.action;
out_request->tid = msg.tid;
out_request->pid = cr.pid;
out_request->uid = cr.uid;
out_request->gid = cr.gid;
if
(msg.action == DEBUGGER_ACTION_CRASH) {
/* Ensure that the tid reported by the crashing process is valid. */
char
buf[
64
];
struct stat s;
snprintf(buf, sizeof buf,
"/proc/%d/task/%d"
, out_request->pid, out_request->tid);
if
(stat(buf, &s)) {
LOG(
"tid %d does not exist in pid %d. ignoring debug request\n"
,
out_request->tid, out_request->pid);
return
-
1
;
}
}
else
if
(cr.uid ==
0
|| (cr.uid == AID_SYSTEM && msg.action == DEBUGGER_ACTION_DUMP_BACKTRACE)) {
/* Only root or system can ask us to attach to any process and dump it explicitly.
* However, system is only allowed to collect backtraces but cannot dump tombstones. */
status = get_process_info(out_request->tid, &out_request->pid,
&out_request->uid, &out_request->gid);
if
(status <
0
) {
LOG(
"tid %d does not exist. ignoring explicit dump request\n"
,
out_request->tid);
return
-
1
;
}
}
else
{
/* No one else is not allowed to dump arbitrary processes. */
return
-
1
;
}
return
0
;
}
? 从socket中读取client端进程的pid uid gid
1getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
? 轮询socket句柄
12345struct pollfd pollfds[
1
];
pollfds[
0
].fd = fd;
pollfds[
0
].events = POLLIN;
pollfds[
0
].revents =
0
;
status = TEMP_FAILURE_RETRY(poll(pollfds,
1
,
3000
));
? 从socket上读取debugger_msg_t结构体
123456789101112131415debugger_msg_t msg;
status = TEMP_FAILURE_RETRY(read(fd, &msg, sizeof(msg)));
if
(status <
0
) {
LOG(
"read failure? %s\n"
, strerror(errno));
return
-
1
;
}
if
(status != sizeof(msg)) {
LOG(
"invalid crash request of size %d\n"
, status);
return
-
1
;
}
out_request->action = msg.action;
out_request->tid = msg.tid;
out_request->pid = cr.pid;
out_request->uid = cr.uid;
out_request->gid = cr.gid;
? 如果debugger_msg_t中设置的action为DEBUGGER_ACTION_CRASH,说明是crash的C/C++进程发来的请求,则判断传进来的tid是否有效。
1234567891011if
(msg.action == DEBUGGER_ACTION_CRASH) {
/* Ensure that the tid reported by the crashing process is valid. */
char
buf[
64
];
struct stat s;
snprintf(buf, sizeof buf,
"/proc/%d/task/%d"
, out_request->pid, out_request->tid);
if
(stat(buf, &s)) {
LOG(
"tid %d does not exist in pid %d. ignoring debug request\n"
,
out_request->tid, out_request->pid);
return
-
1
;
}
}
? 如果debugger_msg_t中设置的action为DEBUGGER_ACTION_DUMP_BACKTRACE说明是其他方式(debuggerd)发来的请求,则要求必须为root权限或者system权限,然后再判断tid是否有效。
2) 从read_request返回后,调用ptrace函数attach到tid指定的进程,此时debuggerd将变为被attache进程的父进程,然后ptrace函数会向子进程发送SIGSTOP信号将子进程停下来。此时,父进程有机会检查子进程核心image和寄存器的值。
1ptrace(PTRACE_ATTACH, request.tid,
0
,
0
)
3) 调用下面的语句给client端子进程回复消息,使clinet端的进程能从read调用中返回。
1TEMP_FAILURE_RETRY(write(fd,
"\0"
,
1
)
4) 在for循环中等待子进程停止。
1int
signal = wait_for_signal(request.tid, &total_sleep_time_usec);
5) 子进程根据收到的不同信号、不同的action进行不同的处理
1234567891011121314151617181920212223242526272829303132333435363738394041switch
(signal) {
case
SIGSTOP:
if
(request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
XLOG(
"stopped -- dumping to tombstone\n"
);
tombstone_path = engrave_tombstone(request.pid, request.tid,
signal,
true
,
true
, &detach_failed,
&total_sleep_time_usec);
}
else
if
(request.action == DEBUGGER_ACTION_DUMP_BACKTRACE) {
XLOG(
"stopped -- dumping to fd\n"
);
dump_backtrace(fd, request.pid, request.tid, &detach_failed,
&total_sleep_time_usec);
}
else
{
XLOG(
"stopped -- continuing\n"
);
status = ptrace(PTRACE_CONT, request.tid,
0
,
0
);
if
(status) {
LOG(
"ptrace continue failed: %s\n"
, strerror(errno));
}
continue
;
/* loop again */
}
break
;
case
SIGILL:
case
SIGABRT:
case
SIGBUS:
case
SIGFPE:
case
SIGSEGV:
case
SIGSTKFLT: {
XLOG(
"stopped -- fatal signal\n"
);
kill(request.pid, SIGSTOP);
tombstone_path = engrave_tombstone(request.pid, request.tid,
signal, !attach_gdb,
false
, &detach_failed,
&total_sleep_time_usec);
break
;
}
case
SIGPIPE:
LOG(
"socket-client process stopped due to SIGPIPE! \n"
);
break
;
default
:
XLOG(
"stopped -- unexpected signal\n"
);
LOG(
"process stopped due to unexpected signal %d\n"
, signal);
break
;
}
?子进程收到SIGSTOP说明进程并没有发生crash,根据action不同将进程信息写入tombstone文件。
?子进程收到七种异常信号说明是进程发生crash,调用engrave_tombstone直接将dump的信息写到tombstone。
6) 调用ptrace(PTRACE_DETACH, request.tid, 0, 0)解除对子进程的追踪;
12345678910111213if
(attach_gdb) {
kill(request.pid, SIGSTOP);
if
(ptrace(PTRACE_DETACH, request.tid,
0
,
0
)) {
LOG(
"ptrace detach from %d failed: %s\n"
, request.tid, strerror(errno));
detach_failed =
true
;
}
wait_for_user_action(request.pid);
}
else
{
if
(ptrace(PTRACE_DETACH, request.tid,
0
,
0
)) {
LOG(
"ptrace detach from %d failed: %s\n"
, request.tid, strerror(errno));
detach_failed =
true
;
}
}
如果运行了类似以下指令:adb shell setprop debug.db.uid 10000;则所有uid<10000的进程发生crash的时候attach_gdb为true,将停止crash进程,并调用ptrace(PTRACE_DETACH, request.tid, 0, 0) 解除对crash进程的追踪后,开始等待gdb的连接。
adb forward tcp:5039 tcp:5039
adb shell gdbserver :5039 --attach pid &
用户按下HOME或者VOLUME DOWN按键,可以使进程继续进行,自然crash
attach_gdb为false时,只会解除对子进程的追踪。
7) 调用kill(request.pid, SIGCONT)恢复被停止的子进程,并让其自然终止;
engrave_tombstone
12345678910111213141516171819202122232425char
* engrave_tombstone(pid_t pid, pid_t tid,
int
signal,
bool dump_sibling_threads, bool quiet, bool* detach_failed,
int
* total_sleep_time_usec) {
mkdir(TOMBSTONE_DIR,
0755
);
chown(TOMBSTONE_DIR, AID_SYSTEM, AID_SYSTEM);
//dump maps & check corefile limit .
dump_creash_maps(pid);
//creat maps file
int
fd;
char
* path = find_and_open_tombstone(&fd);
if
(!path) {
*detach_failed =
false
;
return
NULL;
}
log_t log;
log.tfd = fd;
log.quiet = quiet;
*detach_failed = dump_crash(&log, pid, tid, signal, dump_sibling_threads,
total_sleep_time_usec);
close(fd);
return
path;
}
对于crash的C/C++进程,主要通过这个函数dump进程信息
1.创建”/data/tombstones”文件夹并修改权限
2.调用函数find_and_open_tombstone,tombstone_XX文件最多10个,超过则覆盖最早的
3.调用dump_crash将所有信息dump到tombstone文件:
? dump_build_info(log);
? dump_thread_info(log, pid, tid, true);
? dump_fault_addr(log, tid, signal);
? dump_thread(context, log, tid, true, total_sleep_time_usec) dump进程的上下文信息
? dump_logs(log, pid, true);
? dump_sibling_thread_report(context, log, pid, tid, total_sleep_time_usec);
dump_backtrace
1234567891011121314151617181920212223242526272829303132333435void
dump_backtrace(
int
fd, pid_t pid, pid_t tid, bool* detach_failed,
int
* total_sleep_time_usec) {
log_t log;
log.tfd = fd;
log.quiet =
true
;
ptrace_context_t* context = load_ptrace_context(tid);
dump_process_header(&log, pid);
dump_thread(&log, tid, context,
true
, detach_failed, total_sleep_time_usec);
char
task_path[
64
];
snprintf(task_path, sizeof(task_path),
"/proc/%d/task"
, pid);
DIR* d = opendir(task_path);
if
(d) {
struct dirent debuf;
struct dirent *de;
while
(!readdir_r(d, &debuf, &de) && de) {
if
(!strcmp(de->d_name,
"."
) || !strcmp(de->d_name,
".."
)) {
continue
;
}
char
* end;
pid_t new_tid = strtoul(de->d_name, &end,
10
);
if
(*end || new_tid == tid) {
continue
;
}
dump_thread(&log, new_tid, context,
false
, detach_failed, total_sleep_time_usec);
}
closedir(d);
}
dump_process_footer(&log, pid);
free_ptrace_context(context);
}
? dump_process_header(&log, pid);
? dump_thread(&log, tid, context, true, detach_failed, total_sleep_time_usec);
? dump_process_footer(&log, pid);
调试工具方式
123456789101112131415161718192021int
main(
int
argc,
char
** argv) {
bool dump_backtrace =
false
;
bool have_tid =
false
;
pid_t tid =
0
;
for
(
int
i =
1
; i < argc; i++) {
if
(!strcmp(argv[i],
"-b"
)) {
dump_backtrace =
true
;
}
else
if
(!have_tid) {
tid = atoi(argv[i]);
have_tid =
true
;
}
else
{
usage();
return
1
;
}
}
if
(!have_tid) {
usage();
return
1
;
}
return
do_explicit_dump(tid, dump_backtrace);
}
通过do_explicit_dump函数dump出指定进程的栈信息等
12345678910111213141516171819static
int
do_explicit_dump(pid_t tid, bool dump_backtrace) {
fprintf(stdout,
"Sending request to dump task %d.\n"
, tid);
if
(dump_backtrace) {
fflush(stdout);
if
(dump_backtrace_to_file(tid, fileno(stdout)) <
0
) {
fputs(
"Error dumping backtrace.\n"
, stderr);
return
1
;
}
}
else
{
char
tombstone_path[PATH_MAX];
if
(dump_tombstone(tid, tombstone_path, sizeof(tombstone_path)) <
0
) {
fputs(
"Error dumping tombstone.\n"
, stderr);
return
1
;
}
fprintf(stderr,
"Tombstone written to: %s\n"
, tombstone_path);
}
return
0
;
}
? dump_backtrace_to_file(tid, fileno(stdout))
? dump_tombstone(tid, tombstone_path, sizeof(tombstone_path))
http://www.2cto.com/kf/201408/323895.html
-
SIGILL(非法指令异常) SIGABRT(abort退出异常) SIGBUS(硬件访问异常) SIGFPE(浮点运算异常) SIGSEGV(内存访问异常) SIGSTKFLT(协处理器栈异常) SIGPIPE(管道异常)
Android debuggerd 源码分析
最新推荐文章于 2024-05-03 22:01:23 发布